In [61]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
# from Index_Calculations import facility_index_gen
import os
import glob
import numpy as np
from statsmodels.tsa.tsatools import detrend
from os.path import join
import itertools
In [ ]:
path = os.path.join('Data storage', 'Plant NERC regions.csv')
NERC_labels = pd.read_csv(path)
NERC_labels.columns = ['plant id', 'region', 'year']
In [3]:
facility_path = os.path.join('Data storage', 'Facility gen fuels and CO2 2017-05-25.zip')
epa_path = os.path.join('Data storage', 'Derived data',
'Monthly EPA emissions 2017-05-25.csv')
ef_path = os.path.join('Data storage', 'Final emission factors.csv')
out_folder = os.path.join('Data storage', 'final NERC data')
In [4]:
facility = pd.read_csv(facility_path)
epa = pd.read_csv(epa_path)
In [5]:
NERC_labels.loc[NERC_labels['region'] == 'ERCOT', 'region'] = 'TRE'
In [20]:
for nerc in ['USA', 'TRE', 'FRCC', 'NPCC', 'RFC', 'SPP',
'SERC', 'WECC', 'ASCC', 'MRO', 'HICC']:
# Only use data from 2015 and earlier (final EIA data)
facility_index_gen(eia_facility=facility.loc[(facility['year'] <= 2015)],
epa=epa,
emission_factor_path=ef_path,
facility_regions=NERC_labels,
export_folder=out_folder, export_path_ext=' '
+ nerc, region=nerc)
In [2]:
path = os.path.join('Data storage', 'final NERC data', 'Monthly index*')
mi_fns = glob.glob(path)
path = os.path.join('Data storage', 'final NERC data', 'Monthly gen*')
mg_fns = glob.glob(path)
In [3]:
def make_gen_index(data_folder, regions, time='Monthly'):
"""
Read and combine the region-level generation and index files
inputs:
data_folder: (str or path) path to the region-level data files
time: (str) "Monthly", "Quarterly", or "Annual"
outputs:
gen_index: (df) a dataframe with combined generation and index data
"""
index_path = os.path.join(data_folder, '{} index*'.format(time))
gen_path = os.path.join(data_folder, '{} gen*'.format(time))
# Get filenames and list of region abbreviations
index_fns = glob.glob(index_path)
gen_fns = glob.glob(gen_path)
# Not sure why, but DC shows up in index but not gen
# index_regions = [f.split('.')[0][-4:] for f in index_fns]
# gen_regions = [f.split('.')[0][-4:] for f in gen_fns]
# Read and combine files into single dataframes
def read_combine(paths, regions):
df_list = []
for f, region in zip(paths, regions):
df = pd.read_csv(f)
df['region'] = region
df_list.append(df)
full_df = pd.concat(df_list)
full_df.reset_index(inplace=True, drop=True)
full_df.rename(columns={'year': 'Year'},
inplace=True)
if 'datetime' in full_df.columns:
full_df['datetime'] = pd.to_datetime(full_df['datetime'])
return full_df
index_df = read_combine(index_fns, regions)
gen_df = read_combine(gen_fns, regions)
# return index_df, gen_df
# Different timescales will pivot on different columns
gen_pivot_col = {'Monthly': ['Year', 'datetime'],
'Quarterly': ['Year', 'year_quarter'],
'Annual': ['Year']}
gen_pivot = pd.pivot_table(gen_df, index=['region'] + gen_pivot_col[time],
values='generation (MWh)', columns='fuel category')
gen_pivot.reset_index(inplace=True, drop=False)
# gen_pivot['Year'] = gen_pivot['datetime'].dt.year
gen_pivot.replace(np.nan, 0, inplace=True)
# Merge gen and index dataframes
merge_col = {'Monthly': ['datetime'],
'Quarterly': ['year_quarter'],
'Annual': ['Year']}
gen_index = pd.merge(gen_pivot, index_df[merge_col[time] + ['region', 'index (g/kWh)']],
on=merge_col[time] + ['region'])
return gen_index
In [4]:
data_path = os.path.join('Data storage', 'final NERC data')
# gen_index = make_gen_index(data_path, time='Monthly')
regions = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
annual_gen_index = make_gen_index(data_path, regions, time='Annual')
In [5]:
df_list = []
for f in mi_fns:
region = f.split()[-1][:-4]
df = pd.read_csv(f)
df['region'] = region
df_list.append(df)
full_mi = pd.concat(df_list)
full_mi.reset_index(inplace=True, drop=True)
full_mi.rename(columns={'index (g/kWh)': 'monthly index (g/kWh)'}, inplace=True)
full_mi['datetime'] = pd.to_datetime(full_mi['datetime'])
In [6]:
df_list = []
for f in mg_fns:
region = f.split()[-1][:-4]
df = pd.read_csv(f)
df['region'] = region
df_list.append(df)
full_mg = pd.concat(df_list)
full_mg.reset_index(inplace=True, drop=True)
full_mg['datetime'] = pd.to_datetime(full_mg['datetime'])
monthly_gen = pd.pivot_table(full_mg, index=['region', 'datetime'],
values='generation (MWh)', columns='fuel category')
monthly_gen.reset_index(inplace=True, drop=False)
monthly_gen['Year'] = monthly_gen['datetime'].dt.year
monthly_gen.replace(np.nan, 0, inplace=True)
In [7]:
gen_index = pd.merge(monthly_gen, full_mi[['datetime', 'region', 'monthly index (g/kWh)']],
on=['datetime', 'region'])
gen_index.head()
Out[7]:
There seems to be a large number of facilities left out of data before 2002. Take out 2001 for now.
In [8]:
gen_index = gen_index.loc[gen_index['Year'] >= 2002]
In [9]:
# define the mean absolute deviation as a function
mad = lambda x: np.fabs(x - x.mean()).mean()
for region in gen_index['region'].unique():
gen_index.loc[gen_index['region'] == region, 'Index variability'] = \
gen_index.loc[gen_index['region']==region,
'monthly index (g/kWh)'].rolling(window=12).std()
gen_index.loc[gen_index['region'] == region,
'Normalized Index variability'] = \
gen_index.loc[gen_index['region']==region, 'Index variability'] / \
gen_index.loc[gen_index['region']==region,
'monthly index (g/kWh)'].rolling(window=12).mean()
# Also calculate variability using median absolute deviation (mad)
gen_index.loc[gen_index['region'] == region, 'Index variability (MAD)'] = \
gen_index.loc[gen_index['region']==region,
'monthly index (g/kWh)'].rolling(window=12).apply(mad)
gen_index.loc[gen_index['region'] == region,
'Normalized Index variability (MAD)'] = \
gen_index.loc[gen_index['region']==region, 'Index variability (MAD)'] / \
gen_index.loc[gen_index['region']==region,
'monthly index (g/kWh)'].rolling(window=12).mean()
gen_index.tail()
Out[9]:
In [10]:
fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind',
'Solar', 'Other', 'Other Renewables']
gen_index['Total Generation (MWh)'] = gen_index.loc[:, fuels].sum(axis=1)
In [11]:
gen_index.head()
Out[11]:
I'm going to skip this for now. Difficult to have a base year for all regions.
In [27]:
base_year = 2007
In [28]:
fuels = ['Coal', 'Natural Gas', 'Other Renewables', 'Nuclear', 'Other',
'Solar', 'Wind', 'Hydro']
gen_index['Total gen'] = gen_index.loc[:, fuels].sum(axis=1)
for fuel in fuels:
# New columns that are being added
col_percent = 'percent ' + fuel
col_change = 'change in ' + fuel
# Calculate percent of generation from each fuel type
gen_index[col_percent] = gen_index.loc[:, fuel] / gen_index.loc[:, 'Total gen']
# Percent of fuel in region in base year (entire year)
for region in gen_index['region'].unique():
percent_fuel_base = (gen_index.loc[(gen_index['Year'] == base_year) &
(gen_index['region'] == region), fuel].sum()
/ gen_index.loc[(gen_index['Year'] == base_year) &
(gen_index['region'] == region), 'Total gen'].sum())
# Use percent of fuel in base year to calculate change for each region/month
gen_index.loc[gen_index['region'] == region,
col_change] = ((gen_index.loc[gen_index['region'] == region, col_percent]
- percent_fuel_base)
/ percent_fuel_base)
# Change in variability compared to average base year value
for region in gen_index['region'].unique():
norm_variability_base = gen_index.loc[(gen_index['Year'] == base_year) &
(gen_index['region'] == region), 'Normalized Index variability'].mean()
variability_base = gen_index.loc[(gen_index['Year'] == base_year) &
(gen_index['region'] == region), 'Index variability'].mean()
gen_index.loc[gen_index['region'] == region,
'change in variability'] = ((gen_index.loc[gen_index['region'] == region,
'Index variability'] - variability_base)
/ variability_base)
gen_index.loc[gen_index['region'] == region,
'change in norm variability'] = ((gen_index.loc[gen_index['region'] == region,
'Normalized Index variability'] - norm_variability_base)
/ norm_variability_base)
In [14]:
def weighted_percent(df, fuel, year):
all_fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Other Renewables',
'Wind', 'Solar', 'Hydro']
temp = df.loc[df['Year'] == year, all_fuels]
temp['Total'] = temp.sum(axis=1)
weighted_per = temp[fuel].sum() / temp['Total'].sum() * 100
return weighted_per
Because I've only used facility data, values are most accurate through the end of 2015. After that we lose data in some regions, like most of the hydro in ERCOT. This has a big influence on results.
The rolling standard deviation and the rolling mean absolute deviation look almost the same. There aren't enough outliers to change the results much.
In [12]:
sns.set_style('white', {'axes.linewidth': 1.5,
'axes.grid': True})
sns.set_context('notebook', font_scale=1.2)
In [14]:
def region_facet_grid(df, plot_function, x_axis, y_axis, col_order=None,
suptitle='', add_legend=False, ax_labels=None,
FG_kwargs={}, plot_kwargs={}, context='notebook',
font_scale=1.2):
sns.set_context(context, font_scale)
g = sns.FacetGrid(df, col_order=col_order, **FG_kwargs)
g.map(plot_function, x_axis, y_axis, **plot_kwargs)
g.set_xticklabels(rotation=35)
if add_legend:
g.add_legend()
if suptitle:
plt.suptitle(suptitle, y=1.02, size=15)
if col_order and 'col' in FG_kwargs:
axes = g.axes.flatten()
for ax, title in zip(axes, order):
ax.set_title(title)
if ax_labels:
g.set_axis_labels(ax_labels)
In [63]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
FG_kwargs = dict(col='region',
hue='region',
col_wrap=3,
aspect=1.2,
hue_order=order)
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
y_axis='Index variability', col_order=order,
suptitle='Index Variability', FG_kwargs=FG_kwargs)
In [64]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
FG_kwargs = dict(col='region',
hue='region',
col_wrap=3,
aspect=1.2,
hue_order=order)
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
y_axis='Normalized Index variability', col_order=order,
suptitle='Normalized Index Variability', FG_kwargs=FG_kwargs)
In [18]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
FG_kwargs = dict(col='region',
hue='region',
col_wrap=3,
aspect=1.2,
hue_order=order,
ylim=(0, 1050))
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
y_axis='monthly index (g/kWh)', col_order=order,
suptitle='Monthly Index', FG_kwargs=FG_kwargs,
context='talk', font_scale=1)
plt.savefig(join('Figures', 'Monthly index.pdf'))
In [22]:
gen_index['month'] = gen_index['datetime'].dt.month
gen_index['year'] = gen_index['datetime'].dt.year
View CO2 intensity by month, coding the year as a categorical variable.
In [38]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
temp = gen_index.copy()
with sns.plotting_context('poster'):
g = sns.factorplot(x='month', y='monthly index (g/kWh)', hue='year',
data=gen_index, col='region', col_wrap=3, col_order=order,
palette='viridis_r', scale=0.5)
# plt.suptitle('CO$_2$ Intensity', y=1.02, size=15)
axes = g.axes.flatten()
for ax, title in zip(axes, order):
ax.set_title(title)
ax.set_ylim(0, 1050)
plt.savefig(join('Figures', 'Monthly index (12-months).pdf'))
Out[38]:
Out[38]:
Out[38]:
Out[38]:
Out[38]:
Out[38]:
Out[38]:
Out[38]:
Out[38]:
Out[38]:
Out[38]:
Out[38]:
Out[38]:
Out[38]:
Out[38]:
Out[38]:
Out[38]:
Out[38]:
In [19]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
FG_kwargs = dict(hue='region',
col='region',
col_wrap=3,
aspect=1.2,
hue_order=order,
ylim=(-150, 150))
detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
detrend_df.loc[detrend_df['region'] == col,
'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col,
'monthly index (g/kWh)'])
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime', #col_order=order,
add_legend=False, y_axis='monthly index (g/kWh)', col_order=order,
suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)
In [213]:
def shift_detrend(series, n):
'Shift a series by n periods to detrend'
detrended = series - series.shift(n)
return detrended
In [233]:
a = detrend(gen_index.loc[gen_index['region'] == 'MRO', 'monthly index (g/kWh)'])
b = shift_detrend(gen_index.loc[gen_index['region'] == 'MRO', 'monthly index (g/kWh)'], 1)
c = shift_detrend(gen_index.loc[gen_index['region'] == 'MRO', 'monthly index (g/kWh)'], 12)
In [230]:
import plotly
import plotly.tools as tls
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot
plotly.offline.init_notebook_mode(connected=True)
In [234]:
# fig = plt.figure()
time = gen_index.loc[gen_index['region'] == 'MRO', 'datetime']
# plt.plot(time, a, label='Linear detrend')
# plt.plot(time, b, label='First difference')
# plt.legend(loc=0)
data = [go.Scatter(y=a, x=time, name='Linear detrend'),
go.Scatter(y=b, x=time, name='First differences 1-month'),
go.Scatter(y=c, x=time, name='First differences 12-months')]
iplot(data)
# plotly_fig = tls.mpl_to_plotly(fig)
# plotly.offline.iplot_mpl(fig, resize=True)
In [159]:
order = ['FRCC', 'SPP']
FG_kwargs = dict(hue='region',
# col='region',
# col_wrap=3,
aspect=1.5,
size=5,
hue_order=order,
ylim=(200, 1050))
# detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
# for col in order:
# detrend_df.loc[detrend_df['region'] == col,
# 'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col,
# 'monthly index (g/kWh)'])
corr_coef = gen_index.pivot_table(values='monthly index (g/kWh)',
index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime', #col_order=order,
add_legend=True, y_axis='monthly index (g/kWh)', col_order=order,
suptitle='Monthly Index', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))
Out[159]:
In [160]:
order = ['FRCC', 'SPP']
FG_kwargs = dict(hue='region',
# col='region',
# col_wrap=3,
aspect=1.5,
size=5,
hue_order=order,
ylim=(-200, 200))
detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
detrend_df.loc[detrend_df['region'] == col,
'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col,
'monthly index (g/kWh)'])
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime', #col_order=order,
add_legend=True, y_axis='monthly index (g/kWh)', col_order=order,
suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))
Out[160]:
In [214]:
order = ['FRCC', 'SPP']
FG_kwargs = dict(hue='region',
aspect=1.5,
size=5,
hue_order=order,
ylim=(-200, 200))
detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
detrend_df.loc[detrend_df['region'] == col,
'monthly index (g/kWh)'] = shift_detrend(detrend_df.loc[detrend_df['region'] == col,
'monthly index (g/kWh)'], 1)
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime', #col_order=order,
add_legend=True, y_axis='monthly index (g/kWh)', col_order=order,
suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))
Out[214]:
In [55]:
regions = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
nerc_index = (gen_index.loc[(gen_index['region'].isin(regions)) &
(gen_index['Year'] >= 2006)]
.pivot_table(values='monthly index (g/kWh)',
index='datetime', columns='region'))
nerc_index_detrend = pd.DataFrame(index=nerc_index.index)
for col in nerc_index.columns:
nerc_index_detrend.loc[:, col] = detrend(nerc_index.loc[:, col])
In [56]:
nerc_index_detrend.head()
Out[56]:
Try before 2013 and after 2013 to see if there is a difference.
In [58]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
corr = nerc_index_detrend.loc[(nerc_index_detrend.index.year < 2010) &
(nerc_index_detrend.index.year >= 2007), order[1:]].corr()
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
sns.heatmap(corr, vmax = 0.9, mask=mask, square=True, annot=True,
annot_kws={'size': 10}, cmap='magma_r')
plt.title('2007-2009')
plt.savefig(join('Figures', '2007-2009 correlation.pdf'), bbox_inches='tight')
Out[58]:
Out[58]:
In [59]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
corr = nerc_index_detrend.loc[(nerc_index_detrend.index.year < 2013) &
(nerc_index_detrend.index.year >= 2010), order[1:]].corr()
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
sns.heatmap(corr, vmax = 0.9, mask=mask, square=True, annot=True,
annot_kws={'size': 10}, cmap='magma_r')
plt.title('2010-2012')
plt.savefig(join('Figures', '2010-2012 correlation.pdf'), bbox_inches='tight')
Out[59]:
Out[59]:
In [60]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
corr = nerc_index_detrend.loc[(nerc_index_detrend.index.year >= 2013), order[1:]].corr()
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
sns.heatmap(corr, vmax = 0.9, mask=mask, square=True, annot=True,
annot_kws={'size': 10}, cmap='magma_r')
plt.title('2013-2015')
plt.savefig(join('Figures', '2013-2015 correlation.pdf'), bbox_inches='tight')
Out[60]:
Out[60]:
In [87]:
import itertools
In [85]:
def nerc_correlation(df, regions=[], years=[], detrend=True):
"Pass in non-detrended data. Detrend for just the years of interest"
df_temp = df.loc[(df['region'].isin(regions)) &
(df['Year'].isin(years))].copy()
if detrend:
for region in regions:
df_temp.loc[df_temp['region'] == region,
'monthly index (g/kWh)'] = detrend(df_temp.loc[df_temp['region'] == region,
'monthly index (g/kWh)'])
corr_df = df_temp.pivot_table(values='monthly index (g/kWh)',
index='datetime', columns='region').corr()
return corr_df.iloc[1, 0]
In [95]:
years = range(2006, 2016)
df_list = []
for regions in list(itertools.permutations(order[1:], 2)):
corr_df = pd.DataFrame(columns=['Year', 'Region1', 'Region2', 'Correlation'],
index=years)
corr_df['Year'] = years
corr_df['Region1'] = regions[0]
corr_df['Region2'] = regions[1]
for year in range(2006, 2016): #[years[x:x+2] for x in xrange(0, len(years), 2)]:
result = nerc_correlation(gen_index, regions=regions, years=[year], detrend=False)
corr_df.loc[year, 'Correlation'] = result
df_list.append(corr_df)
In [96]:
g = sns.FacetGrid(pd.concat(df_list), row='Region1', col='Region2')
g.map(plt.plot, 'Year', 'Correlation')
Out[96]:
In [29]:
all_fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Other Renewables',
'Wind', 'Solar', 'Hydro']
value_cols = ['percent {}'.format(fuel) for fuel in all_fuels]
percent_gen_df = pd.melt(gen_index, id_vars=['region', 'datetime'],
value_vars=value_cols, value_name='percent generation',
var_name='fuel')
percent_gen_df['fuel'] = percent_gen_df['fuel'].map(lambda x: x.split()[-1])
percent_gen_df['fuel'].replace('Renewables', 'Other Renewables', inplace=True)
percent_gen_df['fuel'].replace('Gas', 'Natural Gas', inplace=True)
In [30]:
gen_index.head()
Out[30]:
In [31]:
percent_gen_df.head()
Out[31]:
In [32]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
fuel_order = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind', 'Solar',
'Other', 'Other Renewables']
FG_kwargs = dict(hue='fuel',
col='region',
col_wrap=3,
aspect=1.2,
hue_order=fuel_order)
region_facet_grid(df=percent_gen_df, plot_function=plt.plot, x_axis='datetime',
add_legend=True, y_axis='percent generation', col_order=order,
suptitle='', FG_kwargs=FG_kwargs,
context='talk', font_scale=1)
plt.savefig(join('Figures', 'Generation by fuel.pdf'))
In [46]:
percent_gen_df['month'] = percent_gen_df['datetime'].dt.month
percent_gen_df['year'] = percent_gen_df['datetime'].dt.year
In [47]:
percent_gen_df.head()
Out[47]:
All regions, from 2013 through 2015
In [35]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind', 'Solar']
temp_df = percent_gen_df.loc[(percent_gen_df.fuel.isin(fuels)) &
(percent_gen_df.datetime.dt.year >= 2012)]
sns.factorplot(x='month', y='percent generation', hue='fuel', data=temp_df,
col='region', col_wrap=3, col_order=order, hue_order=fuels)
Out[35]:
All regions, before 2013
In [36]:
order = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind', 'Solar']
temp_df = percent_gen_df.loc[(percent_gen_df.fuel.isin(fuels)) &
(percent_gen_df.datetime.dt.year <= 2012)]
sns.factorplot(x='month', y='percent generation', hue='fuel', data=temp_df,
col='region', col_wrap=3, col_order=order, hue_order=fuels)
Out[36]:
Only coal, NG, and wind in SPP. All years in each facet
In [49]:
order = ['SPP']
fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind', 'Solar']
temp_df = percent_gen_df.loc[(percent_gen_df.fuel.isin(['Wind', 'Coal', 'Natural Gas'])) &
(percent_gen_df.region.isin(order))]
sns.factorplot(x='month', y='percent generation', hue='year', data=temp_df,
col='fuel', palette='viridis')
# sns.factorplot(x='month', y='percent generation', hue='fuel', data=temp_df,
# col='year', col_wrap=3, hue_order=fuels)
Out[49]:
In [109]:
order = ['SPP', 'TRE', 'MRO']
fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind', 'Solar']
temp_df = percent_gen_df.loc[(percent_gen_df.fuel.isin(['Wind', 'Coal', 'Natural Gas'])) &
(percent_gen_df.region.isin(order)) &
(percent_gen_df.year >=2007)].copy()
def early_late(x):
if x < 2010:
return 'Early'
elif x < 2013:
return 'Mid'
elif x >= 2013:
return 'Late'
else:
return ValueError
temp_df.loc[:, 'timeframe'] = temp_df.loc[:, 'year'].map(early_late)
merge_cols = ['region', 'month', 'year']
temp_df = pd.merge(temp_df, gen_index[merge_cols + ['Total gen']],
on=merge_cols)
temp_df['generation'] = (temp_df.loc[:, 'percent generation']
* temp_df.loc[:, 'Total gen'] / 1e6)
In [110]:
# a is the list of colors to get wind purple
a = sns.color_palette('tab10')[0:2]
a.append(sns.color_palette('tab10')[4])
In [111]:
with sns.plotting_context(font_scale=1.2):
g = sns.FacetGrid(temp_df, col='timeframe', row='region',
col_order=['Early', 'Mid', 'Late'], row_order=order)
g.map_dataframe(sns.tsplot, time='month', unit='year', value='percent generation',
condition='fuel', err_style='unit_traces', color=a).add_legend()
g.set_axis_labels('Month', 'Percent Generation')
titles = ['{}, {}'.format(a, b) for (a, b) in
list(itertools.product(order, ['2007-2009', '2010-2012', '2013-2015']))]
axes = g.axes.flatten()
for ax, title in zip(axes, titles):
ax.set_title(title)
# plt.savefig(join('Figures', 'Adding wind.pdf'), bbox_inches='tight')
Out[111]:
Out[111]:
Out[111]:
Out[111]:
Out[111]:
Out[111]:
Out[111]:
Out[111]:
Out[111]:
Out[111]:
Out[111]:
In [100]:
temp_df.groupby(['region', 'fuel', 'timeframe']).mean()
Out[100]:
In [113]:
with sns.plotting_context(font_scale=1.2):
g = sns.FacetGrid(temp_df, col='timeframe', row='region',
col_order=['Early', 'Mid', 'Late'], row_order=order)
g.map_dataframe(sns.tsplot, time='month', unit='year', value='generation',
condition='fuel', err_style='unit_traces', color=a).add_legend()
g.set_axis_labels('Month', 'Million MWh')
titles = ['{}, {}'.format(a, b) for (a, b) in
list(itertools.product(order, ['2007-2009', '2010-2012', '2013-2015']))]
axes = g.axes.flatten()
for ax, title in zip(axes, titles):
ax.set_title(title)
ax.set_xticks([1, 4, 7, 10])
# plt.savefig(join('Figures', 'Adding wind.pdf'), bbox_inches='tight')
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Out[113]:
Look at the growth in average monthly wind generation across the two timeframes, and the change in natural gas/coal generation.
In [103]:
temp_df.groupby(['region', 'fuel', 'timeframe']).mean()
Out[103]:
In [126]:
map_timeframe = {'Early': 0,
'Mid': 0.5,
'Late': 1}
data = (temp_df.groupby(['region', 'fuel', 'timeframe'])
.std()
.reset_index())
data['frame'] = data['timeframe'].map(map_timeframe)
data.sort_values(['region', 'fuel', 'frame'], inplace=True)
g = sns.FacetGrid(data, hue='fuel', col='region',
col_order=['SPP', 'TRE', 'MRO'], palette=a)
g.map(plt.plot, 'frame', 'generation').add_legend()
g.set_axis_labels('Timeframe', 'Variability')
axes = g.axes.flatten()
for ax in axes:
ax.set_xticks([0, 0.5, 1])
ax.set_xticklabels([' 2007 -\n 2009 ', '2010 -\n2012 ',
'2013 - \n2015 '])
ax.set_xlim(-.1, 1.1)
Out[126]:
Out[126]:
Out[126]:
Out[126]:
Out[126]:
Out[126]:
Out[126]:
Out[126]:
Out[126]:
Out[126]:
Out[126]:
In [175]:
annual_gen_index.head()
Out[175]:
In [181]:
annual_gen_index.loc[annual_gen_index['region'] == 'SERC']
Out[181]:
In [177]:
regions = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
for region in regions:
temp = annual_gen_index.loc[annual_gen_index['region'] == region]
start_year = temp.loc[:, 'Year'].values[0]
start = temp.loc[:, 'index (g/kWh)'].values[0]
end = temp.loc[:, 'index (g/kWh)'].values[-1]
change = (end - start) / start * 100
print '{} declined {:.1f}%, from {:.1f} in {} to {:.1f}'.format(region, change,
start, start_year,
end)
In [186]:
gen_index.loc[gen_index['region'] == 'USA', ['datetime', 'monthly index (g/kWh)']]
Out[186]:
In [187]:
df_list = []
for f in ai_fns:
region = f.split()[-1][:-4]
df = pd.read_csv(f)
df['region'] = region
df_list.append(df)
full_ai = pd.concat(df_list)
full_ai.reset_index(inplace=True, drop=True)
# full_ai.rename(columns={'index (g/kWh)': 'monthly index (g/kWh)'}, inplace=True)
In [190]:
df_list = []
for f in ag_fns:
region = f.split()[-1][:-4]
df = pd.read_csv(f)
df['region'] = region
df_list.append(df)
full_ag = pd.concat(df_list)
full_ag.reset_index(inplace=True, drop=True)
# full_ag['datetime'] = pd.to_datetime(full_ag['datetime'])
annual_gen = pd.pivot_table(full_ag, index=['region', 'year'],
values='generation (MWh)', columns='fuel category')
annual_gen.reset_index(inplace=True, drop=False)
annual_gen.replace(np.nan, 0, inplace=True)
In [198]:
annual_gen_index = pd.merge(annual_gen, full_ai[['year', 'region', 'index (g/kWh)']],
on=['year', 'region'])
annual_gen_index.head()
Out[198]:
In [202]:
regions = ['USA', 'SPP', 'MRO', 'RFC', 'SERC', 'TRE', 'FRCC', 'WECC', 'NPCC']
for region in regions:
temp = annual_gen_index.loc[annual_gen_index['region'] == region]
start_year = temp.loc[:, 'year'].values[0]
start = temp.loc[:, 'index (g/kWh)'].values[0]
if region in ['FRCC', 'NPCC', 'USA']:
start = temp.loc[:, 'index (g/kWh)'].values[1]
start_year = temp.loc[:, 'year'].values[1]
end = temp.loc[:, 'index (g/kWh)'].values[-1]
per_change = (start - end) / start * 100
change = start - end
rate = change / (2015 - start_year)
print '''{} declined {:.1f} ({:.1f}%), from {:.1f} in {} to {:.1f},
a rate of {:.1f}'''.format(region, change, per_change, start, start_year,
end, rate)
In [ ]: